In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from warnings import filterwarnings
filterwarnings('ignore')
In [2]:
data_path = r'C:\Users\kesha\Desktop\Zomato ML\zomato.csv'
from pandas import read_csv
In [3]:
df= read_csv(data_path)

# Results
print(f'Dataset shape: {df.shape}')
df.head()
Dataset shape: (51717, 17)
Out[3]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city)
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1/5 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1/5 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8/5 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7/5 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8/5 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   url                          51717 non-null  object
 1   address                      51717 non-null  object
 2   name                         51717 non-null  object
 3   online_order                 51717 non-null  object
 4   book_table                   51717 non-null  object
 5   rate                         43942 non-null  object
 6   votes                        51717 non-null  int64 
 7   phone                        50509 non-null  object
 8   location                     51696 non-null  object
 9   rest_type                    51490 non-null  object
 10  dish_liked                   23639 non-null  object
 11  cuisines                     51672 non-null  object
 12  approx_cost(for two people)  51371 non-null  object
 13  reviews_list                 51717 non-null  object
 14  menu_item                    51717 non-null  object
 15  listed_in(type)              51717 non-null  object
 16  listed_in(city)              51717 non-null  object
dtypes: int64(1), object(16)
memory usage: 6.7+ MB
In [5]:
df.isnull().sum()
Out[5]:
url                                0
address                            0
name                               0
online_order                       0
book_table                         0
rate                            7775
votes                              0
phone                           1208
location                          21
rest_type                        227
dish_liked                     28078
cuisines                          45
approx_cost(for two people)      346
reviews_list                       0
menu_item                          0
listed_in(type)                    0
listed_in(city)                    0
dtype: int64
In [6]:
#getting nan feature
feature_na=[feature for feature in df.columns if df[feature].isnull().sum()>0]
feature_na
Out[6]:
['rate',
 'phone',
 'location',
 'rest_type',
 'dish_liked',
 'cuisines',
 'approx_cost(for two people)']
In [7]:
#% of missing values
import numpy as np
for feature in feature_na:
    print('{} has {} % missing values'.format(feature,np.round(df[feature].isnull().sum()/len(df)*100,4)))
rate has 15.0337 % missing values
phone has 2.3358 % missing values
location has 0.0406 % missing values
rest_type has 0.4389 % missing values
dish_liked has 54.2916 % missing values
cuisines has 0.087 % missing values
approx_cost(for two people) has 0.669 % missing values
In [8]:
df['approx_cost(for two people)'].dtype
Out[8]:
dtype('O')
In [9]:
df[df['approx_cost(for two people)'].isnull()]
Out[9]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city)
1662 https://www.zomato.com/bangalore/papa-khan-ras... 6, Abbiah Layout KC Halli Main Road, Bommanaha... Papa Khan Rasoi No No NaN 0 NaN NaN NaN NaN NaN NaN [] [] Delivery Bannerghatta Road
1768 https://www.zomato.com/bangalore/innate-jp-nag... 45, 14th Main, 7th phase, Puttehahalli Palya, ... Innate No No NaN 0 NaN JP Nagar Delivery NaN Beverages NaN [] [] Delivery Bannerghatta Road
1795 https://www.zomato.com/bangalore/super-chefs-b... 8/3, Opposite Suryadhoya Convention hall, Bann... Super Chef's (New Royal treat) No No NaN 0 NaN Bannerghatta Road Quick Bites NaN North Indian, Chinese, South Indian NaN [('Rated 1.0', 'RATED\n Many items were missi... [] Delivery Bannerghatta Road
3069 https://www.zomato.com/bangalore/super-chefs-b... 8/3, Opposite Suryadhoya Convention hall, Bann... Super Chef's (New Royal treat) No No - 0 NaN Bannerghatta Road Quick Bites NaN North Indian, Chinese, South Indian NaN [('Rated 1.0', 'RATED\n Many items were missi... [] Delivery Basavanagudi
3774 https://www.zomato.com/bangalore/the-hub-ibis-... Ibis Bengaluru, Opposite RMZ Ecospace Business... The Hub - Ibis Bengaluru No No 3.9/5 237 NaN Marathahalli Bar Dal Makhani, Cocktails, Lunch Buffet, Breakfas... Finger Food NaN [('Rated 4.0', 'RATED\n The Hub is a small ba... [] Buffet Bellandur
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
50940 https://www.zomato.com/bangalore/cinnabon-whit... Ground Floor, The Forum Neighborhood Mall, Pre... Cinnabon No No 4.0 /5 80 NaN Whitefield Quick Bites Rolls, Cinnamon Roll, Cinnamon Bun Beverages, American, Bakery NaN [('Rated 4.0', "RATED\n Who else is a Cinnabo... [] Desserts Whitefield
51233 https://www.zomato.com/bangalore/mayas-kitchen... 43/9, Borewell Rd, Palm Meadows, Nallurhalli, ... Mayas kitchen No No NaN 0 NaN Whitefield Casual Dining NaN South Indian, Chinese, Chettinad NaN [('Rated 4.0', "RATED\n Food was pretty good,... [] Dine-out Whitefield
51247 https://www.zomato.com/bangalore/mast-kalandar... D64, 33, 1st Floor, Sundari Armadale, Next To ... Mast Kalandar No No 2.6 /5 76 NaN Whitefield Casual Dining Gulab Jamun, Lassi, Dal Makhani, Buttermilk, T... North Indian NaN [('Rated 1.0', 'RATED\n Terrible food in this... [] Dine-out Whitefield
51271 https://www.zomato.com/bangalore/karavalli-foo... N254-107, 1st Main Road, Near ITPL Gate 3, KIA... Karavalli Food Court No No 3.2 /5 4 NaN Whitefield Casual Dining NaN South Indian NaN [('Rated 1.0', 'RATED\n i was a regular custo... [] Dine-out Whitefield
51641 https://www.zomato.com/bangalore/andhra-spices... 188, ITPL Main Road, Brookefields, Bangalore, ... Andhra Spices No No 3.2 /5 4 NaN Brookefield Quick Bites NaN Andhra, Chinese, South Indian NaN [('Rated 2.0', "RATED\n Food is not upto the ... [] Dine-out Whitefield

346 rows × 17 columns

In [10]:
df['approx_cost(for two people)'].unique()
Out[10]:
array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
       '900', '200', '750', '150', '850', '100', '1,200', '350', '250',
       '950', '1,000', '1,500', '1,300', '199', '80', '1,100', '160',
       '1,600', '230', '130', '50', '190', '1,700', nan, '1,400', '180',
       '1,350', '2,200', '2,000', '1,800', '1,900', '330', '2,500',
       '2,100', '3,000', '2,800', '3,400', '40', '1,250', '3,500',
       '4,000', '2,400', '2,600', '120', '1,450', '469', '70', '3,200',
       '60', '560', '240', '360', '6,000', '1,050', '2,300', '4,100',
       '5,000', '3,700', '1,650', '2,700', '4,500', '140'], dtype=object)
In [11]:
# right now it has some NAN Values so it will be of float data-type,dats why very first I have to convert it into string then
#I have to remove this comma
df['approx_cost(for two people)'] = df['approx_cost(for two people)'].astype(str).apply(lambda x: x.replace(',', ''))
In [12]:
df['approx_cost(for two people)']=df['approx_cost(for two people)'].astype(float)
In [13]:
df['approx_cost(for two people)'].dtype
Out[13]:
dtype('float64')
In [14]:
df['rate'].unique()
Out[14]:
array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
In [15]:
df['rate'][0].split('/')[0]
Out[15]:
'4.1'
In [16]:
def split(x):
    return x.split('/')[0]
In [17]:
df['rate'].dtype
Out[17]:
dtype('O')
In [18]:
df['rate'].isnull().sum()
Out[18]:
7775
In [19]:
# right now it has some NAN Values so it will be of float data-type,dats why very first I have to convert it into string then
# I have to split it & access 
df['rate']=df['rate'].astype(str).apply(split)
# df['rate'] = df['rate'].astype(str).apply(lambda x: x.split('/')[0])
In [20]:
df['rate'].replace('NEW',0,inplace=True)
df['rate'].replace('-',0,inplace=True)
df['rate']=df['rate'].astype(str).astype(float)
df['rate'].dtype
Out[20]:
dtype('float64')
In [21]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,12))
df['rest_type'].value_counts().nlargest(20).plot.bar(color='red')

# to provide styling to text on x-axis
plt.gcf().autofmt_xdate()
In [22]:
df.columns
Out[22]:
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')
In [23]:
df['rest_type'].value_counts()
Out[23]:
Quick Bites                   19132
Casual Dining                 10330
Cafe                           3732
Delivery                       2604
Dessert Parlor                 2263
                              ...  
Dessert Parlor, Food Court        2
Bakery, Food Court                2
Food Court, Beverage Shop         2
Sweet Shop, Dessert Parlor        1
Quick Bites, Kiosk                1
Name: rest_type, Length: 93, dtype: int64
In [24]:
def mark(x):
    if x in ('Quick Bites', 'Casual Dining'):
        return 'Quick Bites + Casual Dining'
    else:
        return 'other'
In [25]:
df['Top_types']=df['rest_type'].apply(mark)
df.head()
Out[25]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city) Top_types
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari Quick Bites + Casual Dining
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari Quick Bites + Casual Dining
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari other
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari Quick Bites + Casual Dining
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari Quick Bites + Casual Dining
In [26]:
import plotly.express as px
values=df['Top_types'].value_counts()
labels=df['Top_types'].value_counts().index
In [27]:
fig = px.pie(df, values=values, names=labels,title='Restaurants Pie chart')
fig.show()
In [28]:
df.head()
Out[28]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city) Top_types
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari Quick Bites + Casual Dining
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari Quick Bites + Casual Dining
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari other
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari Quick Bites + Casual Dining
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari Quick Bites + Casual Dining
In [29]:
df.columns
Out[29]:
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)', 'Top_types'],
      dtype='object')
In [30]:
df.dtypes
Out[30]:
url                             object
address                         object
name                            object
online_order                    object
book_table                      object
rate                           float64
votes                            int64
phone                           object
location                        object
rest_type                       object
dish_liked                      object
cuisines                        object
approx_cost(for two people)    float64
reviews_list                    object
menu_item                       object
listed_in(type)                 object
listed_in(city)                 object
Top_types                       object
dtype: object
In [31]:
rest=df.groupby('name').agg({'votes': 'sum','url': 'count','approx_cost(for two people)': 'mean','rate': 'mean'}).reset_index()
rest
Out[31]:
name votes url approx_cost(for two people) rate
0 #FeelTheROLL 14 2 200.0 3.400000
1 #L-81 Cafe 432 9 400.0 3.900000
2 #Vibes Restro 0 3 700.0 NaN
3 #refuel 111 3 400.0 3.700000
4 'Brahmins' Thatte Idli 0 1 100.0 NaN
... ... ... ... ... ...
8787 late100 0 5 200.0 NaN
8788 nu.tree 1443 8 400.0 4.314286
8789 re:cess - Hilton Bangalore Embassy GolfLinks 438 3 1200.0 4.100000
8790 repEAT Hub 0 2 200.0 NaN
8791 sCoolMeal 0 5 300.0 NaN

8792 rows × 5 columns

In [32]:
rest.columns = ['name', 'total_votes', 'total_unities', 'avg_approx_cost', 'mean_rating']
rest.head()
Out[32]:
name total_votes total_unities avg_approx_cost mean_rating
0 #FeelTheROLL 14 2 200.0 3.4
1 #L-81 Cafe 432 9 400.0 3.9
2 #Vibes Restro 0 3 700.0 NaN
3 #refuel 111 3 400.0 3.7
4 'Brahmins' Thatte Idli 0 1 100.0 NaN
In [33]:
rest['votes_per_unity'] = rest['total_votes'] / rest['total_unities']
rest.head()
Out[33]:
name total_votes total_unities avg_approx_cost mean_rating votes_per_unity
0 #FeelTheROLL 14 2 200.0 3.4 7.0
1 #L-81 Cafe 432 9 400.0 3.9 48.0
2 #Vibes Restro 0 3 700.0 NaN 0.0
3 #refuel 111 3 400.0 3.7 37.0
4 'Brahmins' Thatte Idli 0 1 100.0 NaN 0.0
In [34]:
popular=rest.sort_values(by='total_unities', ascending=False)
popular
Out[34]:
name total_votes total_unities avg_approx_cost mean_rating votes_per_unity
1320 Cafe Coffee Day 3089 96 844.791667 3.147191 32.177083
5549 Onesta 347520 85 600.000000 4.410588 4088.470588
3788 Just Bake 2898 73 400.000000 3.355882 39.698630
2446 Empire Restaurant 229808 71 685.211268 3.916901 3236.732394
2577 Five Star Chicken 3134 70 257.857143 3.425000 44.771429
... ... ... ... ... ... ...
1900 Cool Break 11 1 150.000000 3.400000 11.000000
8076 The Shake Factory Originals 8 1 200.000000 3.300000 8.000000
5216 Nethravathi Military Hotel 0 1 200.000000 NaN 0.000000
7426 Swadisht North Indian Restaurant 23 1 200.000000 3.200000 23.000000
5375 Night Punjabi Folk 0 1 200.000000 NaN 0.000000

8792 rows × 6 columns

In [35]:
popular['name'].nunique()
Out[35]:
8792
In [36]:
popular.shape
Out[36]:
(8792, 6)
In [37]:
import seaborn as sns
# Creating a figure for restaurants overview analysis
fig, (ax1,ax2,ax3) = plt.subplots(3,1, figsize=(20,30))

# Plot Pack 01 - Most popular restaurants (votes)

# Annotations
ax1.text(0.50, 0.30, int(popular['total_votes'].mean()), fontsize=45, ha='center')
ax1.text(0.50, 0.12, 'is the average of votes', fontsize=12, ha='center')
ax1.text(0.50, 0.00, 'received by restaurants', fontsize=12, ha='center')
ax1.axis('off')

sns.barplot(x='total_votes', y='name', data=popular.sort_values(by='total_votes', ascending=False)[0:5],ax=ax2, palette='plasma')
ax2.set_title('Top 5 Most Voted Restaurants', size=12)

sns.barplot(x='total_votes', y='name', data=popular.sort_values(by='total_votes', ascending=False).query('total_votes > 0').tail(),ax=ax3, palette='plasma_r')
ax3.set_title('Top 5 Less Voted Restaurants\n(with at least 1 vote)', size=12)
Out[37]:
Text(0.5, 1.0, 'Top 5 Less Voted Restaurants\n(with at least 1 vote)')
In [38]:
popular.columns
Out[38]:
Index(['name', 'total_votes', 'total_unities', 'avg_approx_cost',
       'mean_rating', 'votes_per_unity'],
      dtype='object')
In [39]:
popular.head()
Out[39]:
name total_votes total_unities avg_approx_cost mean_rating votes_per_unity
1320 Cafe Coffee Day 3089 96 844.791667 3.147191 32.177083
5549 Onesta 347520 85 600.000000 4.410588 4088.470588
3788 Just Bake 2898 73 400.000000 3.355882 39.698630
2446 Empire Restaurant 229808 71 685.211268 3.916901 3236.732394
2577 Five Star Chicken 3134 70 257.857143 3.425000 44.771429
In [40]:
fig, (ax1,ax2,ax3) = plt.subplots(3,1, figsize=(20,30))
# Annotations
import numpy as np
ax1.text(0.50, 0.30, np.round(popular['avg_approx_cost'].mean(), 2), fontsize=45, ha='center')
ax1.text(0.50, 0.12, 'is mean approx cost', fontsize=12, ha='center')
ax1.text(0.50, 0.00, 'for Bengaluru restaurants', fontsize=12, ha='center')
ax1.axis('off')

sns.barplot(x='avg_approx_cost', y='name', data=popular.sort_values(by='avg_approx_cost', ascending=False)[0:5],ax=ax2, palette='plasma')
ax2.set_title('Top 5 Most Expensives Restaurants', size=12)

sns.barplot(x='avg_approx_cost', y='name', data=popular.sort_values(by='avg_approx_cost', ascending=False).query('avg_approx_cost > 0').tail(),ax=ax3, palette='plasma_r')
ax3.set_title('Top 5 Less Expensive Restaurants', size=12)
Out[40]:
Text(0.5, 1.0, 'Top 5 Less Expensive Restaurants')
In [41]:
import plotly.graph_objs as go
from plotly.offline import iplot
x=df['book_table'].value_counts()
labels=['not book','book']
In [42]:
trace=go.Pie(labels=labels, values=x,
               hoverinfo='label+percent', textinfo='percent', 
               textfont=dict(size=25),
              pull=[0, 0, 0,0.2, 0]
               )
iplot([trace])
In [43]:
import plotly.express as px
x=df['online_order'].value_counts()
labels=['accepted','not accepted']
In [44]:
fig = px.pie(df, values=x, names=labels,title='Pie chart')
fig.show()
In [45]:
def return_budget(location,restaurant):
    budget=df[(df['approx_cost(for two people)']<=400) & (df['location']==location) & 
                     (df['rate']>4) & (df['rest_type']==restaurant)]
    return(budget['name'].unique())
In [46]:
return_budget('BTM',"Quick Bites")
Out[46]:
array(['Swadista Aahar', 'Litti Twist', 'The Shawarma Shop', 'Gorbandh',
       'Yum In My Tum', 'Chaatimes', "Muthashy's", 'Swad Punjab Da',
       "Domino's Pizza", 'Roti Wala', 'Andhra Kitchen'], dtype=object)
In [47]:
locations=pd.DataFrame({"Name":df['location'].unique()})
In [48]:
# For Bangalore
locations['new_Name']='Bangalore '+locations['Name']
In [49]:
locations.head()
Out[49]:
Name new_Name
0 Banashankari Bangalore Banashankari
1 Basavanagudi Bangalore Basavanagudi
2 Mysore Road Bangalore Mysore Road
3 Jayanagar Bangalore Jayanagar
4 Kumaraswamy Layout Bangalore Kumaraswamy Layout
In [50]:
!pip install geopy
Collecting geopy
  Downloading geopy-2.2.0-py3-none-any.whl (118 kB)
Collecting geographiclib<2,>=1.49
  Downloading geographiclib-1.52-py3-none-any.whl (38 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.52 geopy-2.2.0
WARNING: You are using pip version 21.1.2; however, version 21.2.1 is available.
You should consider upgrading via the 'c:\users\kesha\anaconda3\python.exe -m pip install --upgrade pip' command.
In [51]:
from geopy.geocoders import Nominatim
In [52]:
lat=[]
lon=[]
geolocator=Nominatim(user_agent="app")
for location in locations['Name']:
    location = geolocator.geocode(location)
    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:    
        lat.append(location.latitude)
        lon.append(location.longitude)
In [53]:
locations['latitude']=lat
locations['longitude']=lon
In [54]:
locations.to_csv('zomato_locations.csv',index=False)
In [55]:
Rest_locations=pd.DataFrame(df['location'].value_counts().reset_index())
In [56]:
Rest_locations.columns=['Name','count']
Rest_locations.head()
Out[56]:
Name count
0 BTM 5124
1 HSR 2523
2 Koramangala 5th Block 2504
3 JP Nagar 2235
4 Whitefield 2144
In [57]:
Restaurant_locations=Rest_locations.merge(locations,on='Name',how="left").dropna()
Restaurant_locations.head()
Out[57]:
Name count new_Name latitude longitude
0 BTM 5124 Bangalore BTM 45.954851 -112.496595
1 HSR 2523 Bangalore HSR 18.147500 41.538889
2 Koramangala 5th Block 2504 Bangalore Koramangala 5th Block 12.934377 77.628415
3 JP Nagar 2235 Bangalore JP Nagar 12.265594 76.646540
4 Whitefield 2144 Bangalore Whitefield 44.373058 -71.611858
In [58]:
def generateBaseMap(default_location=[12.97, 77.59], default_zoom_start=12):
    base_map = folium.Map(location=default_location, zoom_start=default_zoom_start)
    return base_map
In [59]:
import folium
from folium.plugins import HeatMap
basemap=generateBaseMap()
In [61]:
data=df[df['rest_type']=='Quick Bites']
In [62]:
data['dish_liked']
Out[62]:
3                                              Masala Dosa
31       Burgers, Lassi, Chicken Grill, Naan, Momos, Ch...
34       Rolls, Veggie Wrap, Chocolate Fantasy, Rice Bo...
36                                                     NaN
39                                                     NaN
                               ...                        
51641                                                  NaN
51642                                                  NaN
51643                                                  NaN
51644                                                  NaN
51645                                              Burgers
Name: dish_liked, Length: 19132, dtype: object
In [64]:
df['reviews_list'][0]
data=df['reviews_list'][0].lower()
data
Out[64]:
'[(\'rated 4.0\', \'rated\\n  a beautiful place to dine in.the interiors take you back to the mughal era. the lightings are just perfect.we went there on the occasion of christmas and so they had only limited items available. but the taste and service was not compromised at all.the only complaint is that the breads could have been better.would surely like to come here again.\'), (\'rated 4.0\', \'rated\\n  i was here for dinner with my family on a weekday. the restaurant was completely empty. ambience is good with some good old hindi music. seating arrangement are good too. we ordered masala papad, panner and baby corn starters, lemon and corrionder soup, butter roti, olive and chilli paratha. food was fresh and good, service is good too. good for family hangout.\\ncheers\'), (\'rated 2.0\', \'rated\\n  its a restaurant near to banashankari bda. me along with few of my office friends visited to have buffet but unfortunately they only provide veg buffet. on inquiring they said this place is mostly visited by vegetarians. anyways we ordered ala carte items which took ages to come. food was ok ok. definitely not visiting anymore.\'), (\'rated 4.0\', \'rated\\n  we went here on a weekend and one of us had the buffet while two of us took ala carte. firstly the ambience and service of this place is great! the buffet had a lot of items and the good was good. we had a pumpkin halwa intm the dessert which was amazing. must try! the kulchas are great here. cheers!\'), (\'rated 5.0\', \'rated\\n  the best thing about the place is itã\x83\\x83ã\x82\\x83ã\x83\\x82ã\x82\\x82ã\x83\\x83ã\x82\\x82ã\x83\\x82ã\x82\\x92s ambiance. second best thing was yummy ? food. we try buffet and buffet food was not disappointed us.\\ntest ?. ?? ?? ?? ?? ??\\nquality ?. ??????????.\\nservice: staff was very professional and friendly.\\n\\noverall experience was excellent.\\n\\nsubirmajumder85.wixsite.com\'), (\'rated 5.0\', \'rated\\n  great food and pleasant ambience. expensive but coll place to chill and relax......\\n\\nservice is really very very good and friendly staff...\\n\\nfood : 5/5\\nservice : 5/5\\nambience :5/5\\noverall :5/5\'), (\'rated 4.0\', \'rated\\n  good ambience with tasty food.\\ncheese chilli paratha with bhutta palak methi curry is a good combo.\\nlemon chicken in the starters is a must try item.\\negg fried rice was also quite tasty.\\nin the mocktails, recommend "alice in junoon". do not miss it.\'), (\'rated 4.0\', \'rated\\n  you canã\x83\\x83ã\x82\\x83ã\x83\\x82ã\x82\\x82ã\x83\\x83ã\x82\\x82ã\x83\\x82ã\x82\\x92t go wrong with jalsa. never been a fan of their buffet and thus always order alacarteã\x83\\x83ã\x82\\x83ã\x83\\x82ã\x82\\x82ã\x83\\x83ã\x82\\x82ã\x83\\x82ã\x82\\x92. service at times can be on the slower side but food is worth the wait.\'), (\'rated 5.0\', \'rated\\n  overdelighted by the service and food provided at this place. a royal and ethnic atmosphere builds a strong essence of being in india and also the quality and taste of food is truly authentic. i would totally recommend to visit this place once.\'), (\'rated 4.0\', \'rated\\n  the place is nice and comfortable. food wise all jalea outlets maintain a good standard. the soya chaap was a standout dish. clearly one of trademark dish as per me and a must try.\\n\\nthe only concern is the parking. it very congested and limited to just 5cars. the basement parking is very steep and makes it cumbersome\'), (\'rated 4.0\', \'rated\\n  the place is nice and comfortable. food wise all jalea outlets maintain a good standard. the soya chaap was a standout dish. clearly one of trademark dish as per me and a must try.\\n\\nthe only concern is the parking. it very congested and limited to just 5cars. the basement parking is very steep and makes it cumbersome\'), (\'rated 4.0\', \'rated\\n  the place is nice and comfortable. food wise all jalea outlets maintain a good standard. the soya chaap was a standout dish. clearly one of trademark dish as per me and a must try.\\n\\nthe only concern is the parking. it very congested and limited to just 5cars. the basement parking is very steep and makes it cumbersome\')]'
In [65]:
import re
data2=re.sub('[^a-zA-Z]', ' ',data)
data2
Out[65]:
'   rated        rated n  a beautiful place to dine in the interiors take you back to the mughal era  the lightings are just perfect we went there on the occasion of christmas and so they had only limited items available  but the taste and service was not compromised at all the only complaint is that the breads could have been better would surely like to come here again       rated        rated n  i was here for dinner with my family on a weekday  the restaurant was completely empty  ambience is good with some good old hindi music  seating arrangement are good too  we ordered masala papad  panner and baby corn starters  lemon and corrionder soup  butter roti  olive and chilli paratha  food was fresh and good  service is good too  good for family hangout  ncheers      rated        rated n  its a restaurant near to banashankari bda  me along with few of my office friends visited to have buffet but unfortunately they only provide veg buffet  on inquiring they said this place is mostly visited by vegetarians  anyways we ordered ala carte items which took ages to come  food was ok ok  definitely not visiting anymore       rated        rated n  we went here on a weekend and one of us had the buffet while two of us took ala carte  firstly the ambience and service of this place is great  the buffet had a lot of items and the good was good  we had a pumpkin halwa intm the dessert which was amazing  must try  the kulchas are great here  cheers       rated        rated n  the best thing about the place is it   x     x     x     x     x     x     x     x  s ambiance  second best thing was yummy   food  we try buffet and buffet food was not disappointed us  ntest                   nquality                nservice  staff was very professional and friendly  n noverall experience was excellent  n nsubirmajumder   wixsite com      rated        rated n  great food and pleasant ambience  expensive but coll place to chill and relax       n nservice is really very very good and friendly staff    n nfood       nservice       nambience      noverall           rated        rated n  good ambience with tasty food  ncheese chilli paratha with bhutta palak methi curry is a good combo  nlemon chicken in the starters is a must try item  negg fried rice was also quite tasty  nin the mocktails  recommend  alice in junoon   do not miss it       rated        rated n  you can   x     x     x     x     x     x     x     x  t go wrong with jalsa  never been a fan of their buffet and thus always order alacarte   x     x     x     x     x     x     x     x    service at times can be on the slower side but food is worth the wait       rated        rated n  overdelighted by the service and food provided at this place  a royal and ethnic atmosphere builds a strong essence of being in india and also the quality and taste of food is truly authentic  i would totally recommend to visit this place once       rated        rated n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome      rated        rated n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome      rated        rated n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome   '
In [66]:
data3=re.sub('rated', ' ',data2)
data3
Out[66]:
'              n  a beautiful place to dine in the interiors take you back to the mughal era  the lightings are just perfect we went there on the occasion of christmas and so they had only limited items available  but the taste and service was not compromised at all the only complaint is that the breads could have been better would surely like to come here again                  n  i was here for dinner with my family on a weekday  the restaurant was completely empty  ambience is good with some good old hindi music  seating arrangement are good too  we ordered masala papad  panner and baby corn starters  lemon and corrionder soup  butter roti  olive and chilli paratha  food was fresh and good  service is good too  good for family hangout  ncheers                 n  its a restaurant near to banashankari bda  me along with few of my office friends visited to have buffet but unfortunately they only provide veg buffet  on inquiring they said this place is mostly visited by vegetarians  anyways we ordered ala carte items which took ages to come  food was ok ok  definitely not visiting anymore                  n  we went here on a weekend and one of us had the buffet while two of us took ala carte  firstly the ambience and service of this place is great  the buffet had a lot of items and the good was good  we had a pumpkin halwa intm the dessert which was amazing  must try  the kulchas are great here  cheers                  n  the best thing about the place is it   x     x     x     x     x     x     x     x  s ambiance  second best thing was yummy   food  we try buffet and buffet food was not disappointed us  ntest                   nquality                nservice  staff was very professional and friendly  n noverall experience was excellent  n nsubirmajumder   wixsite com                 n  great food and pleasant ambience  expensive but coll place to chill and relax       n nservice is really very very good and friendly staff    n nfood       nservice       nambience      noverall                      n  good ambience with tasty food  ncheese chilli paratha with bhutta palak methi curry is a good combo  nlemon chicken in the starters is a must try item  negg fried rice was also quite tasty  nin the mocktails  recommend  alice in junoon   do not miss it                  n  you can   x     x     x     x     x     x     x     x  t go wrong with jalsa  never been a fan of their buffet and thus always order alacarte   x     x     x     x     x     x     x     x    service at times can be on the slower side but food is worth the wait                  n  overdelighted by the service and food provided at this place  a royal and ethnic atmosphere builds a strong essence of being in india and also the quality and taste of food is truly authentic  i would totally recommend to visit this place once                  n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome                 n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome                 n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome   '
In [67]:
data4=re.sub('x',' ',data3)
data4
Out[67]:
'              n  a beautiful place to dine in the interiors take you back to the mughal era  the lightings are just perfect we went there on the occasion of christmas and so they had only limited items available  but the taste and service was not compromised at all the only complaint is that the breads could have been better would surely like to come here again                  n  i was here for dinner with my family on a weekday  the restaurant was completely empty  ambience is good with some good old hindi music  seating arrangement are good too  we ordered masala papad  panner and baby corn starters  lemon and corrionder soup  butter roti  olive and chilli paratha  food was fresh and good  service is good too  good for family hangout  ncheers                 n  its a restaurant near to banashankari bda  me along with few of my office friends visited to have buffet but unfortunately they only provide veg buffet  on inquiring they said this place is mostly visited by vegetarians  anyways we ordered ala carte items which took ages to come  food was ok ok  definitely not visiting anymore                  n  we went here on a weekend and one of us had the buffet while two of us took ala carte  firstly the ambience and service of this place is great  the buffet had a lot of items and the good was good  we had a pumpkin halwa intm the dessert which was amazing  must try  the kulchas are great here  cheers                  n  the best thing about the place is it                                                s ambiance  second best thing was yummy   food  we try buffet and buffet food was not disappointed us  ntest                   nquality                nservice  staff was very professional and friendly  n noverall e perience was e cellent  n nsubirmajumder   wi site com                 n  great food and pleasant ambience  e pensive but coll place to chill and rela        n nservice is really very very good and friendly staff    n nfood       nservice       nambience      noverall                      n  good ambience with tasty food  ncheese chilli paratha with bhutta palak methi curry is a good combo  nlemon chicken in the starters is a must try item  negg fried rice was also quite tasty  nin the mocktails  recommend  alice in junoon   do not miss it                  n  you can                                                t go wrong with jalsa  never been a fan of their buffet and thus always order alacarte                                                  service at times can be on the slower side but food is worth the wait                  n  overdelighted by the service and food provided at this place  a royal and ethnic atmosphere builds a strong essence of being in india and also the quality and taste of food is truly authentic  i would totally recommend to visit this place once                  n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome                 n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome                 n  the place is nice and comfortable  food wise all jalea outlets maintain a good standard  the soya chaap was a standout dish  clearly one of trademark dish as per me and a must try  n nthe only concern is the parking  it very congested and limited to just  cars  the basement parking is very steep and makes it cumbersome   '
In [68]:
re.sub(' +',' ',data4)
Out[68]:
' n a beautiful place to dine in the interiors take you back to the mughal era the lightings are just perfect we went there on the occasion of christmas and so they had only limited items available but the taste and service was not compromised at all the only complaint is that the breads could have been better would surely like to come here again n i was here for dinner with my family on a weekday the restaurant was completely empty ambience is good with some good old hindi music seating arrangement are good too we ordered masala papad panner and baby corn starters lemon and corrionder soup butter roti olive and chilli paratha food was fresh and good service is good too good for family hangout ncheers n its a restaurant near to banashankari bda me along with few of my office friends visited to have buffet but unfortunately they only provide veg buffet on inquiring they said this place is mostly visited by vegetarians anyways we ordered ala carte items which took ages to come food was ok ok definitely not visiting anymore n we went here on a weekend and one of us had the buffet while two of us took ala carte firstly the ambience and service of this place is great the buffet had a lot of items and the good was good we had a pumpkin halwa intm the dessert which was amazing must try the kulchas are great here cheers n the best thing about the place is it s ambiance second best thing was yummy food we try buffet and buffet food was not disappointed us ntest nquality nservice staff was very professional and friendly n noverall e perience was e cellent n nsubirmajumder wi site com n great food and pleasant ambience e pensive but coll place to chill and rela n nservice is really very very good and friendly staff n nfood nservice nambience noverall n good ambience with tasty food ncheese chilli paratha with bhutta palak methi curry is a good combo nlemon chicken in the starters is a must try item negg fried rice was also quite tasty nin the mocktails recommend alice in junoon do not miss it n you can t go wrong with jalsa never been a fan of their buffet and thus always order alacarte service at times can be on the slower side but food is worth the wait n overdelighted by the service and food provided at this place a royal and ethnic atmosphere builds a strong essence of being in india and also the quality and taste of food is truly authentic i would totally recommend to visit this place once n the place is nice and comfortable food wise all jalea outlets maintain a good standard the soya chaap was a standout dish clearly one of trademark dish as per me and a must try n nthe only concern is the parking it very congested and limited to just cars the basement parking is very steep and makes it cumbersome n the place is nice and comfortable food wise all jalea outlets maintain a good standard the soya chaap was a standout dish clearly one of trademark dish as per me and a must try n nthe only concern is the parking it very congested and limited to just cars the basement parking is very steep and makes it cumbersome n the place is nice and comfortable food wise all jalea outlets maintain a good standard the soya chaap was a standout dish clearly one of trademark dish as per me and a must try n nthe only concern is the parking it very congested and limited to just cars the basement parking is very steep and makes it cumbersome '
In [69]:
dataset=df[df['rest_type']=='Quick Bites']
type(dataset['reviews_list'][3])
Out[69]:
str
In [70]:
total_review=' '
for review in dataset['reviews_list']:
    review=review.lower()
    review=re.sub('[^a-zA-Z]', ' ',review)
    review=re.sub('rated', ' ',review)
    review=re.sub('x',' ',review)
    review=re.sub(' +',' ',review)
    total_review=total_review + str(review)
 
In [ ]:
 
In [72]:
df.columns
Out[72]:
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)', 'Top_types'],
      dtype='object')
In [73]:
df['rate'].unique()
Out[73]:
array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
       4.4, 4.3, 0. , 2.9, 3.5, nan, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4,
       2.2, 2.3, 4.8, 4.9, 2.1, 2. , 1.8])
In [74]:
# Splitting restaurants
def assign(x):
    if x>0:
        return 1
    else:
        return 0
df['rated']=df['rate'].apply(assign)
In [75]:
df['rated'].unique()
Out[75]:
array([1, 0], dtype=int64)
In [76]:
new_restaurants = df[df['rated'] == 0]
train_val_restaurants = df.query('rated == 1')
In [77]:
train_val_restaurants.head()
Out[77]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city) Top_types rated
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari Quick Bites + Casual Dining 1
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari Quick Bites + Casual Dining 1
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari other 1
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari Quick Bites + Casual Dining 1
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari Quick Bites + Casual Dining 1
In [78]:
train_val_restaurants['rate'].unique()
Out[78]:
array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
       4.4, 4.3, 2.9, 3.5, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4, 2.2, 2.3,
       4.8, 4.9, 2.1, 2. , 1.8])
In [79]:
# Defining a custom threshold for splitting restaurants into good and bad
threshold = 3.75
train_val_restaurants['target'] = train_val_restaurants['rate'].apply(lambda x: 1 if x >= threshold else 0)
In [80]:
train_val_restaurants.head()
Out[80]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city) Top_types rated target
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari other 1 1
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari Quick Bites + Casual Dining 1 0
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
In [81]:
import matplotlib.pyplot as plt
x=train_val_restaurants['target'].value_counts()
labels=x.index
print(x)
plt.pie(x,explode=[0.0,0.1],autopct='%1.1f%%')
0    21421
1    20244
Name: target, dtype: int64
Out[81]:
([<matplotlib.patches.Wedge at 0x1bd200f1250>,
  <matplotlib.patches.Wedge at 0x1bd200f1850>],
 [Text(-0.04879500100763322, 1.0989172161162393, ''),
  Text(0.05323091019014493, -1.1988187812177156, '')],
 [Text(-0.026615455095072665, 0.5994093906088577, '51.4%'),
  Text(0.031051364277584537, -0.699310955710334, '48.6%')])
In [82]:
train_val_restaurants.columns
Out[82]:
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)', 'Top_types', 'rated', 'target'],
      dtype='object')
In [83]:
train_val_restaurants.head()
Out[83]:
url address name online_order book_table rate votes phone location rest_type dish_liked cuisines approx_cost(for two people) reviews_list menu_item listed_in(type) listed_in(city) Top_types rated target
0 https://www.zomato.com/bangalore/jalsa-banasha... 942, 21st Main Road, 2nd Stage, Banashankari, ... Jalsa Yes Yes 4.1 775 080 42297555\r\n+91 9743772233 Banashankari Casual Dining Pasta, Lunch Buffet, Masala Papad, Paneer Laja... North Indian, Mughlai, Chinese 800.0 [('Rated 4.0', 'RATED\n A beautiful place to ... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
1 https://www.zomato.com/bangalore/spice-elephan... 2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ... Spice Elephant Yes No 4.1 787 080 41714161 Banashankari Casual Dining Momos, Lunch Buffet, Chocolate Nirvana, Thai G... Chinese, North Indian, Thai 800.0 [('Rated 4.0', 'RATED\n Had been here for din... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
2 https://www.zomato.com/SanchurroBangalore?cont... 1112, Next to KIMS Medical College, 17th Cross... San Churro Cafe Yes No 3.8 918 +91 9663487993 Banashankari Cafe, Casual Dining Churros, Cannelloni, Minestrone Soup, Hot Choc... Cafe, Mexican, Italian 800.0 [('Rated 3.0', "RATED\n Ambience is not that ... [] Buffet Banashankari other 1 1
3 https://www.zomato.com/bangalore/addhuri-udupi... 1st Floor, Annakuteera, 3rd Stage, Banashankar... Addhuri Udupi Bhojana No No 3.7 88 +91 9620009302 Banashankari Quick Bites Masala Dosa South Indian, North Indian 300.0 [('Rated 4.0', "RATED\n Great food and proper... [] Buffet Banashankari Quick Bites + Casual Dining 1 0
4 https://www.zomato.com/bangalore/grand-village... 10, 3rd Floor, Lakshmi Associates, Gandhi Baza... Grand Village No No 3.8 166 +91 8026612447\r\n+91 9901210005 Basavanagudi Casual Dining Panipuri, Gol Gappe North Indian, Rajasthani 600.0 [('Rated 4.0', 'RATED\n Very good restaurant ... [] Buffet Banashankari Quick Bites + Casual Dining 1 1
In [84]:
# train_val_restaurants['total_cuisines'] = train_val_restaurants['cuisines'].astype(str).apply(lambda x: len(x.split(',')))

def count(x):
    return len(x.split(','))
In [85]:
#as it have some NAN value that why very first I have to convert into str  &  then apply a function
train_val_restaurants['total_cuisines']=train_val_restaurants['cuisines'].astype(str).apply(count)
train_val_restaurants['multiple_types']=train_val_restaurants['rest_type'].astype(str).apply(count)
In [86]:
train_val_restaurants.columns
Out[86]:
Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)', 'Top_types', 'rated', 'target',
       'total_cuisines', 'multiple_types'],
      dtype='object')
In [87]:
imp_features=['online_order','book_table','location','rest_type','multiple_types','total_cuisines','listed_in(type)', 'listed_in(city)','approx_cost(for two people)','target']
In [88]:
data = train_val_restaurants[imp_features]
In [89]:
data.isnull().sum()
Out[89]:
online_order                     0
book_table                       0
location                         0
rest_type                      149
multiple_types                   0
total_cuisines                   0
listed_in(type)                  0
listed_in(city)                  0
approx_cost(for two people)    247
target                           0
dtype: int64
In [90]:
data.dropna(how='any',inplace=True)
In [91]:
data.isnull().sum()
Out[91]:
online_order                   0
book_table                     0
location                       0
rest_type                      0
multiple_types                 0
total_cuisines                 0
listed_in(type)                0
listed_in(city)                0
approx_cost(for two people)    0
target                         0
dtype: int64
In [92]:
# Splitting features by data type
cat_features= [col for col in data.columns if data[col].dtype == 'O']
num_features= [col for col in data.columns if data[col].dtype != 'O']
In [93]:
cat_features
Out[93]:
['online_order',
 'book_table',
 'location',
 'rest_type',
 'listed_in(type)',
 'listed_in(city)']
In [94]:
for feature in cat_features:
    print('{} has total {} unique features'.format(feature, data[feature].nunique()))
online_order has total 2 unique features
book_table has total 2 unique features
location has total 92 unique features
rest_type has total 87 unique features
listed_in(type) has total 7 unique features
listed_in(city) has total 30 unique features
In [95]:
data.shape
Out[95]:
(41271, 10)
In [96]:
cols=['location','rest_type','listed_in(city)']
for col in cols:
    print('Total feature in {} are {}'.format(col,data[col].nunique()))
    print(data[col].value_counts()/(len(data))*100)
    print('\n')
Total feature in location are 92
BTM                      9.398851
Koramangala 5th Block    5.565651
HSR                      4.829057
Indiranagar              4.361416
JP Nagar                 4.143345
                           ...   
Yelahanka                0.009692
West Bangalore           0.007269
Rajarajeshwari Nagar     0.004846
Nagarbhavi               0.002423
Peenya                   0.002423
Name: location, Length: 92, dtype: float64


Total feature in rest_type are 87
Quick Bites              33.643478
Casual Dining            23.299654
Cafe                      8.163117
Dessert Parlor            4.482566
Delivery                  4.048848
                           ...    
Cafe, Food Court          0.004846
Dessert Parlor, Kiosk     0.004846
Bakery, Food Court        0.004846
Bakery, Beverage Shop     0.002423
Quick Bites, Kiosk        0.002423
Name: rest_type, Length: 87, dtype: float64


Total feature in listed_in(city) are 30
BTM                      6.265901
Koramangala 7th Block    5.720724
Koramangala 4th Block    5.461462
Koramangala 5th Block    5.451770
Koramangala 6th Block    5.139202
Jayanagar                4.647331
JP Nagar                 3.959197
Indiranagar              3.726588
Church Street            3.678127
MG Road                  3.668435
Brigade Road             3.593322
Lavelle Road             3.515786
Residency Road           3.258947
HSR                      3.222602
Marathahalli             3.137797
Bannerghatta Road        2.926995
Whitefield               2.914880
Old Airport Road         2.842189
Brookefield              2.762230
Basavanagudi             2.597466
Kammanahalli             2.473892
Sarjapur Road            2.466623
Kalyan Nagar             2.408471
Bellandur                2.316397
Frazer Town              2.309127
Malleshwaram             2.292166
Rajajinagar              2.112864
Electronic City          1.986867
Banashankari             1.763950
New BEL Road             1.378692
Name: listed_in(city), dtype: float64


In [98]:
percent=data['location'].value_counts()/len(data)*100
values=percent.values
len(values[values>0.4])
Out[98]:
46
In [99]:
# set Threshold value 0.4 
values=data['location'].value_counts()/len(data)*100
values
Out[99]:
BTM                      9.398851
Koramangala 5th Block    5.565651
HSR                      4.829057
Indiranagar              4.361416
JP Nagar                 4.143345
                           ...   
Yelahanka                0.009692
West Bangalore           0.007269
Rajarajeshwari Nagar     0.004846
Nagarbhavi               0.002423
Peenya                   0.002423
Name: location, Length: 92, dtype: float64
In [100]:
threshold=0.4
imp=values[values>threshold]
imp
Out[100]:
BTM                      9.398851
Koramangala 5th Block    5.565651
HSR                      4.829057
Indiranagar              4.361416
JP Nagar                 4.143345
Jayanagar                3.959197
Whitefield               3.808970
Marathahalli             3.416443
Bannerghatta Road        2.970609
Koramangala 7th Block    2.556274
Koramangala 6th Block    2.553851
Brigade Road             2.549005
Bellandur                2.415740
Sarjapur Road            2.069250
Koramangala 1st Block    2.064404
Ulsoor                   2.057135
Koramangala 4th Block    2.037750
Electronic City          2.020789
MG Road                  1.921446
Banashankari             1.802719
Kalyan Nagar             1.681568
Malleshwaram             1.553149
Residency Road           1.463497
Richmond Road            1.463497
Basavanagudi             1.441690
Frazer Town              1.371423
Church Street            1.322963
Brookefield              1.315694
New BEL Road             1.226042
Kammanahalli             1.194543
Lavelle Road             1.165467
Cunningham Road          1.150929
Banaswadi                1.133968
Rajajinagar              1.117007
Domlur                   0.983742
Shanti Nagar             0.891667
Old Airport Road         0.855322
St. Marks Road           0.831092
Shivajinagar             0.671173
Commercial Street        0.654212
Ejipura                  0.620290
Jeevan Bhima Nagar       0.598483
Vasanth Nagar            0.593637
Koramangala 8th Block    0.513678
Koramangala 3rd Block    0.462795
Wilson Garden            0.453103
Name: location, dtype: float64
In [101]:
data['location']=np.where(data['location'].isin(imp.index),data['location'],'other')
In [102]:
data['location'].nunique()
Out[102]:
47
In [103]:
values2=data['rest_type'].value_counts()/len(data)*100
values2
Out[103]:
Quick Bites              33.643478
Casual Dining            23.299654
Cafe                      8.163117
Dessert Parlor            4.482566
Delivery                  4.048848
                           ...    
Cafe, Food Court          0.004846
Dessert Parlor, Kiosk     0.004846
Bakery, Food Court        0.004846
Bakery, Beverage Shop     0.002423
Quick Bites, Kiosk        0.002423
Name: rest_type, Length: 87, dtype: float64
In [104]:
data['rest_type'].head(20)
Out[104]:
0           Casual Dining
1           Casual Dining
2     Cafe, Casual Dining
3             Quick Bites
4           Casual Dining
5           Casual Dining
6           Casual Dining
7     Casual Dining, Cafe
8                    Cafe
9                    Cafe
10                   Cafe
11                   Cafe
12                   Cafe
13                   Cafe
14    Cafe, Casual Dining
15                   Cafe
16                   Cafe
17                   Cafe
18                   Cafe
19                   Cafe
Name: rest_type, dtype: object
In [105]:
len(values2[values2>0.3])
Out[105]:
29
In [106]:
threshold=1.5
imp2=values2[values2>1.5]
imp2
Out[106]:
Quick Bites           33.643478
Casual Dining         23.299654
Cafe                   8.163117
Dessert Parlor         4.482566
Delivery               4.048848
Takeaway, Delivery     3.099028
Casual Dining, Bar     2.645926
Bakery                 1.705798
Beverage Shop          1.555572
Bar                    1.550726
Name: rest_type, dtype: float64
In [107]:
imp2.index
Out[107]:
Index(['Quick Bites', 'Casual Dining', 'Cafe', 'Dessert Parlor', 'Delivery',
       'Takeaway, Delivery', 'Casual Dining, Bar', 'Bakery', 'Beverage Shop',
       'Bar'],
      dtype='object')
In [108]:
data['rest_type'].isin(imp2.index)
Out[108]:
0         True
1         True
2        False
3         True
4         True
         ...  
51709     True
51711     True
51712     True
51715     True
51716    False
Name: rest_type, Length: 41271, dtype: bool
In [109]:
data['rest_type']=np.where(data['rest_type'].isin(imp2.index),data['rest_type'],'other')
In [110]:
data['rest_type']
Out[110]:
0             Casual Dining
1             Casual Dining
2                     other
3               Quick Bites
4             Casual Dining
                ...        
51709    Casual Dining, Bar
51711    Casual Dining, Bar
51712                   Bar
51715                   Bar
51716                 other
Name: rest_type, Length: 41271, dtype: object
In [111]:
for feature in cat_features:
    print('{} has total {} unique features'.format(feature, data[feature].nunique()))
online_order has total 2 unique features
book_table has total 2 unique features
location has total 47 unique features
rest_type has total 11 unique features
listed_in(type) has total 7 unique features
listed_in(city) has total 30 unique features
In [112]:
cat_features
Out[112]:
['online_order',
 'book_table',
 'location',
 'rest_type',
 'listed_in(type)',
 'listed_in(city)']
In [113]:
import pandas as pd
data_cat = data[cat_features]
for col in cat_features:
    col_encoded = pd.get_dummies(data_cat[col],prefix=col,drop_first=True)
    data_cat=pd.concat([data_cat,col_encoded],axis=1)
    data_cat.drop(col, axis=1, inplace=True)
In [114]:
data_cat.shape
Out[114]:
(41271, 93)
In [115]:
data_cat.head(10)
Out[115]:
online_order_Yes book_table_Yes location_Banashankari location_Banaswadi location_Bannerghatta Road location_Basavanagudi location_Bellandur location_Brigade Road location_Brookefield location_Church Street ... listed_in(city)_Lavelle Road listed_in(city)_MG Road listed_in(city)_Malleshwaram listed_in(city)_Marathahalli listed_in(city)_New BEL Road listed_in(city)_Old Airport Road listed_in(city)_Rajajinagar listed_in(city)_Residency Road listed_in(city)_Sarjapur Road listed_in(city)_Whitefield
0 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 1 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 1 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
5 1 0 0 0 0 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
7 1 1 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
8 1 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
9 1 0 1 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

10 rows × 93 columns

In [116]:
data_cat.shape
Out[116]:
(41271, 93)
In [117]:
data.head()
Out[117]:
online_order book_table location rest_type multiple_types total_cuisines listed_in(type) listed_in(city) approx_cost(for two people) target
0 Yes Yes Banashankari Casual Dining 1 3 Buffet Banashankari 800.0 1
1 Yes No Banashankari Casual Dining 1 3 Buffet Banashankari 800.0 1
2 Yes No Banashankari other 2 3 Buffet Banashankari 800.0 1
3 No No Banashankari Quick Bites 1 2 Buffet Banashankari 300.0 0
4 No No Basavanagudi Casual Dining 1 2 Buffet Banashankari 600.0 1
In [118]:
data_final=pd.concat([data.loc[:,['multiple_types','total_cuisines','approx_cost(for two people)','target']],data_cat],axis=1)
In [119]:
data_final.shape
Out[119]:
(41271, 97)
In [120]:
# Splitting the data
X = data_final.drop('target', axis=1)
y = data_final['target'].values
In [121]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, random_state=42)
In [122]:
X_train.shape
Out[122]:
(33016, 96)
In [123]:
# Import the random forest model.
from sklearn.ensemble import RandomForestClassifier
In [124]:
# Initialize the model with some parameters.
model = RandomForestClassifier(n_estimators=100, min_samples_leaf=10, random_state=1)
In [125]:
model.fit(X_train, y_train)
Out[125]:
RandomForestClassifier(min_samples_leaf=10, random_state=1)
In [126]:
# Make predictions.
predictions = model.predict(X_test)

# Compute the error.
from sklearn.metrics import confusion_matrix
confusion_matrix(predictions, y_test)
Out[126]:
array([[3556, 1283],
       [ 696, 2720]], dtype=int64)
In [127]:
from sklearn.metrics import accuracy_score
accuracy_score(predictions,y_test)
Out[127]:
0.7602665051483949
In [129]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
In [130]:
models = []
models.append(('LogisticRegression', LogisticRegression()))
models.append(('Naive Bayes',GaussianNB()))
models.append(('RandomForest', RandomForestClassifier()))
models.append(('Decision Tree', DecisionTreeClassifier()))
models.append(('KNN', KNeighborsClassifier(n_neighbors = 5)))
In [131]:
for name,model in models:
    print(name)
    print(models)
LogisticRegression
[('LogisticRegression', LogisticRegression()), ('Naive Bayes', GaussianNB()), ('RandomForest', RandomForestClassifier()), ('Decision Tree', DecisionTreeClassifier()), ('KNN', KNeighborsClassifier())]
Naive Bayes
[('LogisticRegression', LogisticRegression()), ('Naive Bayes', GaussianNB()), ('RandomForest', RandomForestClassifier()), ('Decision Tree', DecisionTreeClassifier()), ('KNN', KNeighborsClassifier())]
RandomForest
[('LogisticRegression', LogisticRegression()), ('Naive Bayes', GaussianNB()), ('RandomForest', RandomForestClassifier()), ('Decision Tree', DecisionTreeClassifier()), ('KNN', KNeighborsClassifier())]
Decision Tree
[('LogisticRegression', LogisticRegression()), ('Naive Bayes', GaussianNB()), ('RandomForest', RandomForestClassifier()), ('Decision Tree', DecisionTreeClassifier()), ('KNN', KNeighborsClassifier())]
KNN
[('LogisticRegression', LogisticRegression()), ('Naive Bayes', GaussianNB()), ('RandomForest', RandomForestClassifier()), ('Decision Tree', DecisionTreeClassifier()), ('KNN', KNeighborsClassifier())]
In [132]:
for name, model in models:
    print(name)
    model.fit(X_train, y_train)
    
    # Make predictions.
    predictions = model.predict(X_test)

    # Compute the error.
    from sklearn.metrics import confusion_matrix
    print(confusion_matrix(predictions, y_test))

    from sklearn.metrics import accuracy_score
    print(accuracy_score(predictions,y_test))
    print('\n')
LogisticRegression
[[3462 1523]
 [ 790 2480]]
0.7198061780738946


Naive Bayes
[[3040 1460]
 [1212 2543]]
0.6763173834039976


RandomForest
[[3502  936]
 [ 750 3067]]
0.7957601453664446


Decision Tree
[[3634  811]
 [ 618 3192]]
0.826892792247123


KNN
[[3623 1006]
 [ 629 2997]]
0.8019382192610539


In [ ]: